// Copyright (c) Meta Platforms, Inc. and affiliates.

#include <gtest/gtest.h>

#include "custom_parsers/csv/csv_lexer.h"
#include "openzl/zl_errors.h"
#include "tests/utils.h"

using namespace ::testing;

TEST(LexTest, test)
{
    constexpr std::string_view str =
            R"(H,2019GQ0000088,6,02600,3,01,1195583,1207712,0,1,3,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
H,2019GQ0000096,6,00700,3,01,1195583,1207712,0,1,2,,,,,,,,,,,,,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
H,2019GQ0000153,6,00800,3,01,1195583,1207712,0,1,3,,,,,,,,,,,,,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
)";

    // clang-format off
    constexpr std::array<uint32_t, 559> expectedStrLens = { 
    // Header
    0,
    // row 1
    1, 1, 13, 1, 1, 1, 5, 1, 1, 1, 2, 1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 13, 1, 91, 1, 43, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    // row 2
    1, 1, 13, 1, 1, 1, 5, 1, 1, 1, 2, 1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 13, 1, 91, 1, 43, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    // row 3
    1, 1, 13, 1, 1, 1, 5, 1, 1, 1, 2, 1, 7, 1, 7, 1, 1, 1, 1, 1, 1, 13, 1, 91, 1, 43, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    };

    constexpr std::array<uint16_t, 559> expectedDispatchIndices = {
    0, 0, 237, 1, 237, 2, 237, 3, 237, 4, 237, 5, 237, 6, 237, 7, 237, 8, 237, 9, 237, 10, 237, 23, 237, 114, 237, 157, 237, 158, 237, 159, 237, 160, 237, 161, 237, 162, 237, 163, 237, 164, 237, 165, 237, 166, 237, 167, 237, 168, 237, 169, 237, 170, 237, 171, 237, 172, 237, 173, 237, 174, 237, 175, 237, 176, 237, 177, 237, 178, 237, 179, 237, 180, 237, 181, 237, 182, 237, 183, 237, 184, 237, 185, 237, 186, 237, 187, 237, 188, 237, 189, 237, 190, 237, 191, 237, 192, 237, 193, 237, 194, 237, 195, 237, 196, 237, 197, 237, 198, 237, 199, 237, 200, 237, 201, 237, 202, 237, 203, 237, 204, 237, 205, 237, 206, 237, 207, 237, 208, 237, 209, 237, 210, 237, 211, 237, 212, 237, 213, 237, 214, 237, 215, 237, 216, 237, 217, 237, 218, 237, 219, 237, 220, 237, 221, 237, 222, 237, 223, 237, 224, 237, 225, 237, 226, 237, 227, 237, 228, 237, 229, 237, 230, 237, 231, 237, 232, 237, 233, 237, 234, 237, 235, 237, 236, 237,
    0, 237, 1, 237, 2, 237, 3, 237, 4, 237, 5, 237, 6, 237, 7, 237, 8, 237, 9, 237, 10, 237, 23, 237, 114, 237, 157, 237, 158, 237, 159, 237, 160, 237, 161, 237, 162, 237, 163, 237, 164, 237, 165, 237, 166, 237, 167, 237, 168, 237, 169, 237, 170, 237, 171, 237, 172, 237, 173, 237, 174, 237, 175, 237, 176, 237, 177, 237, 178, 237, 179, 237, 180, 237, 181, 237, 182, 237, 183, 237, 184, 237, 185, 237, 186, 237, 187, 237, 188, 237, 189, 237, 190, 237, 191, 237, 192, 237, 193, 237, 194, 237, 195, 237, 196, 237, 197, 237, 198, 237, 199, 237, 200, 237, 201, 237, 202, 237, 203, 237, 204, 237, 205, 237, 206, 237, 207, 237, 208, 237, 209, 237, 210, 237, 211, 237, 212, 237, 213, 237, 214, 237, 215, 237, 216, 237, 217, 237, 218, 237, 219, 237, 220, 237, 221, 237, 222, 237, 223, 237, 224, 237, 225, 237, 226, 237, 227, 237, 228, 237, 229, 237, 230, 237, 231, 237, 232, 237, 233, 237, 234, 237, 235, 237, 236, 237,
    0, 237, 1, 237, 2, 237, 3, 237, 4, 237, 5, 237, 6, 237, 7, 237, 8, 237, 9, 237, 10, 237, 23, 237, 114, 237, 157, 237, 158, 237, 159, 237, 160, 237, 161, 237, 162, 237, 163, 237, 164, 237, 165, 237, 166, 237, 167, 237, 168, 237, 169, 237, 170, 237, 171, 237, 172, 237, 173, 237, 174, 237, 175, 237, 176, 237, 177, 237, 178, 237, 179, 237, 180, 237, 181, 237, 182, 237, 183, 237, 184, 237, 185, 237, 186, 237, 187, 237, 188, 237, 189, 237, 190, 237, 191, 237, 192, 237, 193, 237, 194, 237, 195, 237, 196, 237, 197, 237, 198, 237, 199, 237, 200, 237, 201, 237, 202, 237, 203, 237, 204, 237, 205, 237, 206, 237, 207, 237, 208, 237, 209, 237, 210, 237, 211, 237, 212, 237, 213, 237, 214, 237, 215, 237, 216, 237, 217, 237, 218, 237, 219, 237, 220, 237, 221, 237, 222, 237, 223, 237, 224, 237, 225, 237, 226, 237, 227, 237, 228, 237, 229, 237, 230, 237, 231, 237, 232, 237, 233, 237, 234, 237, 235, 237, 236, 237,
    };

    std::vector<size_t> expectedNewlines = { 186, 372, 558 };
    // clang-format on
    std::vector<uint32_t> stringLens(1100, 0);
    std::vector<uint16_t> dispatchIndices(1100, 0);
    uint8_t nbColumns = 237;
    char sep          = ',';

    std::vector<size_t> newLines(1100, 0);
    ZL_CSV_lexResult lexResult;
    lexResult.stringLens      = stringLens.data();
    lexResult.dispatchIndices = dispatchIndices.data();
    lexResult.nbColumns       = nbColumns;
    lexResult.newlineIndices  = newLines.data();
    auto e                    = createNullAwareLexAndDispatch(
            &lexResult, str.data(), str.length(), sep);
    EXPECT_FALSE(ZL_isError(e));
    size_t nbStrs = ZL_validResult(e);
    EXPECT_EQ(nbStrs, expectedStrLens.size());
    for (size_t i = 0; i < nbStrs; ++i) {
        EXPECT_EQ(expectedStrLens[i], stringLens[i]);
    }
    for (size_t i = 0; i < nbStrs; ++i) {
        EXPECT_EQ(expectedDispatchIndices[i], dispatchIndices[i]);
    }
    newLines.resize(lexResult.nbNewlines);
    EXPECT_EQ(expectedNewlines, newLines);
}

TEST(LexTest, singleLine)
{
    std::string inputNoNewline               = "aaa,bb,c,\"d\"";
    std::string input                        = inputNoNewline + "\n";
    std::vector<uint32_t> expectedStringLens = { 0, 3, 1, 2, 1, 1, 1, 3, 1 };
    std::vector<uint16_t> expectedDispatchIndicess = {
        0, 0, 4, 1, 4, 2, 4, 3, 4
    };
    std::vector<uint32_t> stringLens(100, 0);
    std::vector<uint16_t> dispatchIndices(100, 0);
    std::vector<size_t> newLines(1100, 0);
    ZL_CSV_lexResult lexResult;
    lexResult.stringLens      = stringLens.data();
    lexResult.dispatchIndices = dispatchIndices.data();
    lexResult.nbColumns       = 4;
    lexResult.newlineIndices  = newLines.data();

    auto e = createNullAwareLexAndDispatch(
            &lexResult, inputNoNewline.data(), inputNoNewline.size(), ',');
    EXPECT_TRUE(ZL_isError(e));

    e = createNullAwareLexAndDispatch(
            &lexResult, input.data(), input.size(), ',');
    EXPECT_FALSE(ZL_isError(e));
    size_t nbStrs = ZL_validResult(e);
    EXPECT_EQ(nbStrs, expectedStringLens.size());
    for (size_t i = 0; i < nbStrs; ++i) {
        EXPECT_EQ(expectedStringLens[i], stringLens[i]);
    }
    for (size_t i = 0; i < nbStrs; ++i) {
        EXPECT_EQ(expectedDispatchIndicess[i], dispatchIndices[i]);
    }
}

TEST(LexTest, singlePairOfQuotes)
{
    std::string input                        = "aaa,\"\",\"\"\n";
    std::vector<uint32_t> expectedStringLens = { 0, 3, 1, 2, 1, 2, 1 };
    std::vector<uint32_t> stringLens(100, 0);
    std::vector<size_t> newLines(100, 0);

    ZL_CSV_lexResult lexResult;
    lexResult.stringLens     = stringLens.data();
    lexResult.nbColumns      = 3;
    lexResult.newlineIndices = newLines.data();

    auto e = createParsedCsv(&lexResult, input.data(), input.size(), ',');
    EXPECT_FALSE(ZL_isError(e));
    size_t nbStrs = ZL_validResult(e);
    EXPECT_EQ(nbStrs, expectedStringLens.size());
    for (size_t i = 0; i < nbStrs; ++i) {
        EXPECT_EQ(expectedStringLens[i], stringLens[i]);
    }
}

TEST(LexTest, multiplePairsOfQuotes)
{
    std::string input =
            "aaa,\"\",\"\",\"\",\"\",\"\",\"\"\"\",\"\",\"\"\"\"\",\"\n";
    std::vector<uint32_t> expectedStringLens = { 0, 3, 1, 2, 1, 2, 1, 2, 1, 2,
                                                 1, 2, 1, 4, 1, 2, 1, 7, 1 };
    std::vector<uint32_t> stringLens(100, 0);
    std::vector<size_t> newLines(100, 0);

    ZL_CSV_lexResult lexResult;
    lexResult.stringLens     = stringLens.data();
    lexResult.nbColumns      = 9;
    lexResult.newlineIndices = newLines.data();

    auto e = createParsedCsv(&lexResult, input.data(), input.size(), ',');
    EXPECT_FALSE(ZL_isError(e));
    size_t nbStrs = ZL_validResult(e);
    EXPECT_EQ(nbStrs, expectedStringLens.size());
    for (size_t i = 0; i < nbStrs; ++i) {
        EXPECT_EQ(expectedStringLens[i], stringLens[i]);
    }
}

TEST(LexTest, newLineInQuotes)
{
    std::string input                    = "a, ,\"\n\",a \n b, , ,b \n";
    std::vector<size_t> expectedNewLines = { 8, 16 };
    std::vector<size_t> newLines(100, 0);
    std::vector<uint32_t> stringLens(100, 0);
    ZL_CSV_lexResult lexResult;
    lexResult.stringLens     = stringLens.data();
    lexResult.nbColumns      = 4;
    lexResult.newlineIndices = newLines.data();
    EXPECT_ZS_VALID(
            createParsedCsv(&lexResult, input.data(), input.size(), ','));
    newLines.resize(lexResult.nbNewlines);
    EXPECT_EQ(newLines, expectedNewLines);
}
